max_steps_per_episode = 200
ENV_NAME = 'CartPole-v0'
# List of environments in Gym
# https://github.com/openai/gym/wiki/Table-of-environments
# https://gym.openai.com/envs/#classic_control
import os
os.environ['KMP_DUPLICATE_LIB_OK'] = 'True'
import numpy as np
import matplotlib.pyplot as plt
import gym
from matplotlib.animation import FuncAnimation
from IPython.display import HTML
def display_frames_as_gif(frames):
dpi = 200
fig = plt.figure(figsize=(frames[0].shape[1]/dpi, frames[0].shape[0]/dpi),
dpi=dpi)
patch = plt.imshow(frames[0])
plt.axis('off')
def animate(i):
patch.set_data(frames[i])
anim = FuncAnimation(plt.gcf(), animate, frames=len(frames),
interval=10, repeat=False)
print('Saving images into an animation gif file ...')
anim.save('movie_{}_random.gif'.format(ENV_NAME), writer='pillow')
print('Done.')
return anim
frames = []
n_trials = 5
env = gym.make(ENV_NAME)
for i in range(n_trials):
observation = env.reset() # Reset environment first
for _ in range(max_steps_per_episode):
frames.append(env.render(mode='rgb_array')) # add a snapshot image to frames
action = np.random.choice(2) # 0: Push to left, 1: Push to right
observation, reward, done, info = env.step(action) # exec an action
env.close()
C:\Users\shugo\Miniconda3\envs\ds2021\lib\site-packages\gym\logger.py:30: UserWarning: WARN: You are calling 'step()' even though this environment has already returned done = True. You should always call 'reset()' once you receive 'done = True' -- any further steps are undefined behavior.
warnings.warn(colorize('%s: %s'%('WARN', msg % args), 'yellow'))
%%capture
plt.rcParams["animation.html"] = "jshtml"
%time an = display_frames_as_gif(frames)
ret = an.to_jshtml()
HTML(ret) # should be placed in another cell
import rl.callbacks
import time
class ViewLogger(rl.callbacks.Callback):
def __init__(self):
self.frames = []
def on_action_end(self, action, logs):
self.frames.append(self.env.render(mode='rgb_array'))
def view(self, interval=5, start_frame=0, end_frame=0):
assert start_frame<len(self.frames)
self.start_frame = start_frame
if end_frame == 0 or end_frame > len(self.frames):
end_frame = len(self.frames)
self.t0 = time.time()
# display sizes
plt.figure(figsize=(2.5,2.5), dpi=200)
self.patch = plt.imshow(self.frames[0])
plt.axis('off')
anim = FuncAnimation(plt.gcf(), self._plot,
frames=end_frame-start_frame, interval=interval,
repeat=False)
print('Saving images into an animation gif file ...')
anim.save('movie_{}.gif'.format(ENV_NAME), writer='pillow')
print('Done.')
return anim.to_jshtml()
def _plot(self, frame):
if frame % max_steps_per_episode == 0:
print("{}f {}m".format(frame, (time.time()-self.t0)/60))
self.patch.set_data(self.frames[frame+self.start_frame])
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Flatten
from tensorflow.keras.optimizers import Adam
from rl.agents.dqn import DQNAgent
from rl.policy import BoltzmannQPolicy
from rl.memory import SequentialMemory
np.random.seed(111)
env.seed(111)
[111]
env = gym.make(ENV_NAME)
nb_actions = env.action_space.n
nb_observ = env.observation_space.shape
print('Number of dimensions for observation:', nb_observ)
print('Number of dimensions for action:', nb_actions)
Number of dimensions for observation: (4,) Number of dimensions for action: 2
model = Sequential()
model.add(Flatten(input_shape=(1,) + nb_observ))
model.add(Dense(16, activation='relu'))
model.add(Dense(16, activation='relu'))
model.add(Dense(16, activation='relu'))
model.add(Dense(nb_actions, activation='linear'))
print(model.summary())
Model: "sequential_1" _________________________________________________________________ Layer (type) Output Shape Param # ================================================================= flatten_1 (Flatten) (None, 4) 0 _________________________________________________________________ dense_4 (Dense) (None, 16) 80 _________________________________________________________________ dense_5 (Dense) (None, 16) 272 _________________________________________________________________ dense_6 (Dense) (None, 16) 272 _________________________________________________________________ dense_7 (Dense) (None, 2) 34 ================================================================= Total params: 658 Trainable params: 658 Non-trainable params: 0 _________________________________________________________________ None
memory = SequentialMemory(limit=50000, window_length=1)
policy = BoltzmannQPolicy()
dqn = DQNAgent(model=model, nb_actions=nb_actions, memory=memory, nb_steps_warmup=10,
target_model_update=1e-2, policy=policy)
dqn.compile(Adam(lr=1e-3), metrics=['mae'])
%%time
# If visualization is set to True, animation during the training can be seen.
# But this slows down training quite a lot
fit_log = dqn.fit(env, nb_steps=50000, visualize=False, verbose=1)
#fit_log = dqn.fit(env, nb_steps=50000, visualize=True, verbose=1)
Training for 50000 steps ...
Interval 1 (0 steps performed)
1/10000 [..............................] - ETA: 21:49 - reward: 1.0000
C:\Users\shugo\AppData\Roaming\Python\Python38\site-packages\rl\memory.py:40: UserWarning: Not enough entries to sample without replacement. Consider increasing your warm-up phase to avoid oversampling!
warnings.warn('Not enough entries to sample without replacement. Consider increasing your warm-up phase to avoid oversampling!')
10000/10000 [==============================] - 86s 9ms/step - reward: 1.0000 101 episodes - episode_reward: 98.624 [9.000, 200.000] - loss: 2.306 - mae: 19.631 - mean_q: 39.816 Interval 2 (10000 steps performed) 10000/10000 [==============================] - 88s 9ms/step - reward: 1.0000 51 episodes - episode_reward: 195.804 [165.000, 200.000] - loss: 6.681 - mae: 41.563 - mean_q: 83.836 Interval 3 (20000 steps performed) 10000/10000 [==============================] - 90s 9ms/step - reward: 1.0000 52 episodes - episode_reward: 193.269 [146.000, 200.000] - loss: 7.642 - mae: 45.220 - mean_q: 90.812 Interval 4 (30000 steps performed) 10000/10000 [==============================] - 93s 9ms/step - reward: 1.0000 51 episodes - episode_reward: 195.196 [147.000, 200.000] - loss: 8.094 - mae: 43.434 - mean_q: 87.025 Interval 5 (40000 steps performed) 10000/10000 [==============================] - 96s 10ms/step - reward: 1.0000 done, took 452.663 seconds Wall time: 7min 32s
weight_file = 'dqn_{}_weights.h5f'.format(ENV_NAME)
dqn.save_weights(weight_file, overwrite=True)
plt.plot(fit_log.history['episode_reward'], label='reward')
plt.xlabel('Episodes')
plt.ylabel('Rewards')
plt.legend()
plt.show()
%%time
n_trials = 5
view = ViewLogger()
test_log = dqn.test(env, nb_episodes=n_trials, visualize=False, callbacks=[view])
Testing for 5 episodes ... Episode 1: reward: 200.000, steps: 200 Episode 2: reward: 200.000, steps: 200 Episode 3: reward: 200.000, steps: 200 Episode 4: reward: 200.000, steps: 200 Episode 5: reward: 200.000, steps: 200 Wall time: 18.3 s
%%time
ret = view.view(interval=10)
Saving images into an animation gif file ... 0f 0.0036769390106201174m 0f 0.003810326258341471m 200f 0.16015223662058511m 400f 0.3169355551401774m 600f 0.5126023888587952m 800f 0.6742190440495809m Done. 0f 0.9601522167523702m 0f 0.9602189064025879m 200f 1.1445528467496235m 400f 1.3286527911822001m 600f 1.514585554599762m 800f 1.7003355344136557m Wall time: 1min 53s
HTML(ret) # should be placed in another cell
env.close()